{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Building and Using Sparse Matrices"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as pt\n",
        "\n",
        "import scipy.sparse as sps"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Building a sparse matrix"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "**COO**rdinate format is typically convenient for building (\"assembling\") a sparse matrix:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "collapsed": false
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.5/dist-packages/IPython/core/formatters.py:92: DeprecationWarning: DisplayFormatter._ipython_display_formatter_default is deprecated: use @default decorator instead.\n",
            "  def _ipython_display_formatter_default(self):\n",
            "/usr/local/lib/python3.5/dist-packages/IPython/core/formatters.py:669: DeprecationWarning: PlainTextFormatter._singleton_printers_default is deprecated: use @default decorator instead.\n",
            "  def _singleton_printers_default(self):\n"
          ]
        },
        {
          "data": {
            "text/plain": [
              "<10x10 sparse matrix of type '<class 'numpy.float64'>'\n",
              "\twith 3 stored elements in COOrdinate format>"
            ]
          },
          "execution_count": 3,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "data = [5, 6, 7]\n",
        "rows = [1, 1, 2]\n",
        "columns = [2, 4, 6]\n",
        "\n",
        "A = sps.coo_matrix(\n",
        "        (data, (rows, columns)),\n",
        "        shape=(10, 10), dtype=np.float64)\n",
        "A"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "collapsed": false
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "matrix([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
              "        [ 0.,  0.,  5.,  0.,  6.,  0.,  0.,  0.,  0.,  0.],\n",
              "        [ 0.,  0.,  0.,  0.,  0.,  0.,  7.,  0.,  0.,  0.],\n",
              "        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
              "        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
              "        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
              "        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
              "        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
              "        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
              "        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])"
            ]
          },
          "execution_count": 4,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "A.todense()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "collapsed": false
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "3"
            ]
          },
          "execution_count": 5,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "A.nnz"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "collapsed": false
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "<matplotlib.lines.Line2D at 0x7fcfcdd38f28>"
            ]
          },
          "execution_count": 6,
          "metadata": {},
          "output_type": "execute_result"
        },
        {
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPcAAAD7CAYAAAC2TgIoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAACbpJREFUeJzt3VuopXd5x/HvL9lKEhP0siSDkVSGHkBlLoxtKHk9gKKg\nNy1EbQUv9MZD0CIJ3mRdFW9EUurNNBpQYgsOQkVsEZGVi16Y2mTqYSaOEGgSRwNStIgIJn16sTdD\n3Htmr3eyDu9az3w/MGStnT9rPWzmO+9hv/tdqSok9XPd1ANIWg/jlpoybqkp45aaMm6pKeOWmtpo\n3EnekeTJJBeS3LfJ934pkpxI8p0k55L8IMnHp55pjCTXJXk8ydennmWMJK9M8tUk55P8KMmdU8+0\nSJJPJPlhku8neSTJy6ee6bCNxZ3kOuAfgLcDfwq8N8kfber9X6LngU9W1Z8AfwZ8ZAdmBrgXODf1\nEFfhQeCbVfXHwOuB8xPPc6wktwIfA05V1euAPeCeaac6apNb7jcCP6mq/66q3wH/DLxng+9/1arq\n51V19uDxr9n/S3fbtFMdL8kJ4J3AQ1PPMkaSW4C/qKqHAarq+ar634nHGuN64BVJ9oCbgIsTz3PE\nJuO+DXjmRc+fZctDebEkrwHeAHx32kkW+hzwKWBXLj28A/hFkocPDiVOJ7lx6qGOU1UXgc8CTwM/\nBX5ZVd+edqqjNhl3LvO1nfgLmORm4Axw78EWfCsleRfw3MHeRrj893zb7AGngM9X1SngN8D90450\nvCSvYn+v83bgVuDmJO+bdqqjNhn3s8CrX/T8BFu4K3PYwW7XGeDLVfUvU8+zwF3Au5M8BfwT8OYk\nX5p4pkWeBZ6pqu8dPD/Dfuzb7G3AU1X1P1X1AvA14M8nnumITcb9H8Brk9x+cGbxHmAXzuZ+EThX\nVQ9OPcgiVfXpqnp1Vd3B/vf3O1X1gannOk5VPQc8k+TkwZfeyvafDHwaeFOSG5KE/Zm37iTg3qbe\nqKpeSPJR4Fvs/6Pyharaum/IiyW5C3g/8IMkT7B/GPHpqvq3aSdr5+PAI0leBjwFfHDieY5VVY8l\nOQM8Afzu4L+np53qqPgrn1JPXqEmNWXcUlPGLTVl3FJTxi01tbIfhSXxtLs0kao6cjXiSrfcVTXq\nzwMPPDB67Tb82bV5nfnamvdK3C2XmjJuqalJ4h6GYYq3fcl2bV5w5k3Y9nlXdvlpklrVa0kaLwn1\nUk+o7dq9zySN2HIf3PvsAvu/1naR/V/dvKeqnjy0zi23NIFlttw7d+8zSePi3ul7n0nXqjFx7+y9\nz6Rr2ZjLT0ff+2w2m116PAzD1v+oQNpF8/mc+Xy+cN2YE2rXAz9m/4Taz4DHgPfWoVskeUJNmsaV\nTqgt3HLXDt77TJIXsUg7b6mLWCTtHuOWmjJuqSnjlpoybqkp45aaMm6pKeOWmjJuqSnjlpoybqkp\n45aaMm6pqZV9VlgXH/7wZ7hw4bcL1508eQOnT9+/gYkW28WZtX7GfciFC7/l0UdnI1aOWbMZuziz\n1s/dcqkp45aaMm6pKeOWmjJuqSnjlpoybqkp45aa8iKWQ06evIExF3vsr9sOuziz1s8PJZB2nB9K\nIF1jjFtqyrilpoxbasq4paaMW2rKuKWmjFtqyrilpoxbaspryzUJ79i6fgvjTnIC+BLwB8ALwD9W\n1d+vezD15h1b12/Mlvt54JNVdTbJzcB/JvlWVT255tkkLWHhMXdV/byqzh48/jVwHrht3YNJWs5V\nnVBL8hrgDcB31zGMpNUZfULtYJf8DHDvwRb8iNlsdunxMAwMw7DkeJIOm8/nzOfzhetG3awhyR7w\nDeBfq+rBK6zxZg0abRhmo06o3X33jPl88bpr2bI3a/gicO5KYUvaPgvjTnIX8H7gLUmeSPJ4knes\nfzRJy1h4zF1V/w5cv4FZJK2QV6hpEt6xdf28+6m047z7qXSNMW6pKeOWmjJuqSnjlpoybqkp45aa\nMm6pKeOWmjJuqSnjlpoybqkp45aaMm6pKeOWmjJuqSnjlpoybqkp45aaMm6pKeOWmjJuqSnjlpoy\nbqkp45aaMm6pKeOWmjJuqSnjlpoybqkp45aaMm6pKeOWmjJuqanRcSe5LsnjSb6+zoEkrcbVbLnv\nBc6taxBJqzUq7iQngHcCD613HEmrMnbL/TngU0CtcRZJK7S3aEGSdwHPVdXZJAOQK62dzWaXHg/D\nwDAMy08o6ffM53Pm8/nCdak6fmOc5O+AvwaeB24EbgG+VlUfOLSuFr2WpNVLQlUd2egujPvQi9wN\n/G1Vvfsy/8+4pQlcKW5/zi01dVVb7mNfyC23NAm33NI1xrilpoxbasq4paaMW2rKuKWmjFtqyril\npoxbasq4paaMW2rKuKWmjFtqyrilpoxbasq4paaMW2rKuKWmjFtqyrilpoxbasq4paaMW2rKuKWm\njFtqyrilpoxbasq4paaMW2rKuKWmjFtqyrilpoxbasq4paaMW2pqVNxJXpnkq0nOJ/lRkjvXPZik\n5eyNXPcg8M2q+qske8BNa5xJ0gqkqo5fkNwCnK2qP1ywrha9lqTVS0JV5fDXx+yW3wH8IsnDSR5P\ncjrJjasfUdIqjYl7DzgFfL6qTgG/Ae5f61SSljbmmPtZ4Jmq+t7B8zPAfZdbOJvNLj0ehoFhGJYc\nT9Jh8/mc+Xy+cN3CY26AJI8CH6qqC0keAG6qqvsOrfGYW5rAlY65x8b9euAh4GXAU8AHq+pXh9YY\ntzSBpeIe+QbGLU1gmbPlknaQcUtNGbfUlHFLTRm31JRxS00Zt9SUcUtNGbfUlHFLTRm31JRxS00Z\nt9SUcUtNGbfUlHFLTRm31JRxS00Zt9SUcUtNGbfUlHFLTRm31JRxS00Zt9SUcUtNGbfUlHFLTRm3\n1JRxS00Zt9SUcUtNGbfUlHFLTRm31JRxS02NijvJJ5L8MMn3kzyS5OXrHkzSchbGneRW4GPAqap6\nHbAH3LPuwSQtZ2/kuuuBVyT5P+Am4OL6RpK0Cgu33FV1Efgs8DTwU+CXVfXtdQ8maTkLt9xJXgW8\nB7gd+BVwJsn7quorh9fOZrNLj4dhYBiGlQ0qad98Pmc+ny9cl6o6fkHyl8Dbq+pDB8//Brizqj56\naF0tei1Jq5eEqsrhr485W/408KYkNyQJ8Fbg/KoHlLRaY465HwPOAE8A/wUEOL3muSQtaeFu+egX\ncrdcmsQyu+WSdpBxS00Zt9SUcUtNGbfUlHFLTRm31JRxS00Zt9SUcUtNGbfUlHFLTRm31JRxS00Z\nt9SUcUtNGbfUlHFLTRm31JRxS00Zt9SUcUtNGbfUlHFLTRm31NQkcY/5hMJtsmvzgjNvwrbPa9wj\n7Nq84MybsO3zulsuNWXcUlMr/ZTPlbyQpKt2uU/5XFnckraLu+VSU8YtNWXcUlPGLTVl3FJT/w/y\nOTHdtWYlVgAAAABJRU5ErkJggg==\n",
            "text/plain": [
              "<matplotlib.figure.Figure at 0x7fcfce81a780>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "pt.spy(A)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "For a COO matrix, the juicy attributes are `data`, `row`, and `col`."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "collapsed": false
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "row: [1 1 2]\n",
            "col: [2 4 6]\n",
            "data: [ 5.  6.  7.]\n"
          ]
        }
      ],
      "source": [
        "print(\"row:\", A.row)\n",
        "print(\"col:\", A.col)\n",
        "print(\"data:\", A.data)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "**COO**rdinate format is not the only format. \n",
        "\n",
        "There is also [**C**ompressed **S**parse **R**ow](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_.28CSR.2C_CRS_or_Yale_format.29):"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "collapsed": false
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "<10x10 sparse matrix of type '<class 'numpy.float64'>'\n",
              "\twith 3 stored elements in Compressed Sparse Row format>"
            ]
          },
          "execution_count": 12,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "Acsr = A.tocsr()\n",
        "Acsr"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "For Compressed Sparse Row, look in `data`, `indptr`, and `indices`."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "collapsed": false
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "indptr: [0 0 2 3 3 3 3 3 3 3 3]\n",
            "indices: [2 4 6]\n",
            "data: [ 5.  6.  7.]\n"
          ]
        }
      ],
      "source": [
        "print(\"indptr:\", Acsr.indptr)\n",
        "print(\"indices:\", Acsr.indices)\n",
        "print(\"data:\", Acsr.data)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Performance of the Matrix-Vector Product"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "The following code randomly generates a sparse matrix that has a given `fill_percent` percentage of nonzero entries:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 18,
      "metadata": {
        "collapsed": true
      },
      "outputs": [],
      "source": [
        "fill_percent = 5\n",
        "\n",
        "size = 1000\n",
        "nentries = size**2 * fill_percent // 100\n",
        "\n",
        "data = np.random.randn(nentries)\n",
        "rows = (np.random.rand(nentries)*size).astype(np.int32)\n",
        "columns = (np.random.rand(nentries)*size).astype(np.int32)\n",
        "\n",
        "B_coo = sps.coo_matrix(\n",
        "        (data, (rows, columns)),\n",
        "        shape=(size, size), dtype=np.float64)\n",
        "\n",
        "B_csr = sps.csr_matrix(B_coo)\n",
        "\n",
        "B_dense = B_coo.todense()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Next, we time matrix-vector multiplication for different versions of `B`:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 26,
      "metadata": {
        "collapsed": false
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "time: 1.96073\n"
          ]
        }
      ],
      "source": [
        "vec = np.random.randn(size)\n",
        "\n",
        "from time import time\n",
        "start = time()\n",
        "\n",
        "for i in range(2000):\n",
        "    B_dense.dot(vec)\n",
        "    \n",
        "print(\"time: %g\" % (time() - start))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.5.1+"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}